package com.shhc.crawler;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import net.coobird.thumbnailator.Thumbnails;

/**  
* 类说明   
*  
* @author  Gaogl  
* @date 2018年4月2日  新建  
*/
public class JsoupBaidu {

	public static ExecutorService pool = Executors.newFixedThreadPool(17);

	public static void main(String[] args) throws Exception {

		String downloadPath = "G:\\刘佳宜图片";
		//String aPath = "G:\\test.txt";
		//String readExcel = readExcel("G:\\常用菜品汇总.xlsx");
		List<String> list = nameList("雪菜肉丝汤 烤鱼 果仁菠菜 花生仁拌菠菜 松子仁拌菠菜 酸辣白菜 蚝油茄子 鱼香茄子 肉沫茄子 麻酱拌茄子"
				+ " 炖排骨 黄豆炖排骨 冬瓜排骨汤 豆角炖排骨 花生炖排骨 莲藕炖排骨 海带炖排骨 胡萝卜玉米炖排骨 虾米豆腐羹 虾仁豆腐 蟹黄豆腐 小葱拌豆腐 香椿黄豆 "
				+ "清蒸鱼 豉汁蒸鱼 清蒸鲈鱼 清蒸草鱼 山药炖牛肉 土豆炖牛肉 胡萝卜炖牛肉 胡萝卜烧牛腩 土豆炖猪肉");
		 // 1代表下载一页，一页一般有30张图片
		getPictures(list, 50, downloadPath);
		pool.shutdown();
	}

	public static void getPictures(List<String> keywordList, int max, String downloadPath) throws Exception { // key为关键词,max作为爬取的页数
		String finalURL = "";
		String tempPath = "";
		long start = 0;
		for (String keyword : keywordList) {
			tempPath = downloadPath;
			if (!tempPath.endsWith("\\")) {
				tempPath = downloadPath + "\\";
			}
			tempPath = tempPath + keyword + "\\";
			File f = new File(tempPath);
			if (!f.exists()) {
				f.mkdirs();
			}
			int picCount = 1;
			for (int page = 0; page <= max; page++) {
				sop("正在下载第" + page + "页面");
				Document document = null;
				start = System.currentTimeMillis();
				try {
					String url = "http://image.baidu.com/search/avatarjson?tn=resultjsonavatarnew&ie=utf-8&word="
							+ keyword + "&cg=star&pn=" + page * 30
							+ "&rn=30&itg=0&z=0&fr=&width=&height=&lm=-1&ic=0&s=0&st=-1&gsm="
							+ Integer.toHexString(page * 30);
					sop(url);
					// 请求参数
					document = Jsoup.connect(url).data("query", "Java")
							// 设置urer-agent
							// get();
							.userAgent("Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)")
							.timeout(2000).get();
					String xmlSource = document.toString();
					xmlSource = StringEscapeUtils.unescapeHtml4(xmlSource);
					sop(xmlSource);
					String reg = "objURL\":\"http://.+?\\.jpg";
					Pattern pattern = Pattern.compile(reg);
					Matcher m = pattern.matcher(xmlSource);
					while (m.find()) {
						finalURL = m.group().substring(9);
						sop(keyword + picCount++ + ":" + finalURL);
						pool.execute(new DownloadTask(finalURL, tempPath));
						// download(finalURL, tempPath);
						//sop("             下载成功");
					}
				} catch (IOException e) {
					e.printStackTrace();
				} finally {
					System.out.println("Time is:" + (System.currentTimeMillis() - start) + "ms");
				}
			}
		}
		sop("下载完毕");
		delMultyFile(downloadPath);
		sop("已经删除所有空图");
	}
	//删除空图片
	public static void delMultyFile(String path) {
		File file = new File(path);
		if (!file.exists()) {
			throw new RuntimeException("File \"" + path + "\" NotFound when excute the method of delMultyFile()....");
		}

	}
	
	//关键字进行切割并存放到集合当中
	 public static List<String> nameList(String nameList){  
		 List<String> arr = new ArrayList<>();
			String[] list;
			if (nameList.contains(",")) {
				list = nameList.split(",");
			} else if (nameList.contains("、")) {
				list = nameList.split("、");
			} else if (nameList.contains(" ")) {
				list = nameList.split(" ");
			} else {
				arr.add(nameList);
				return arr;
			}
			for (String s : list) {
				arr.add(s);
			}
			return arr;
	    }  
	 
	 //解析txt文本
	public static String readFile(String filePath) {
		File file = new File(filePath);
		System.out.println("文件绝对路径 :" + file.getAbsolutePath());
		BufferedReader br = null;
		String str = null;
		StringBuffer buffer = new StringBuffer();
		try {
			br = new BufferedReader(new FileReader(file));
			while ((str = br.readLine()) != null) {
				buffer.append(str);
			}

		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				br.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
		return buffer.toString();
		
	}

	public static void sop(Object obj) {
		System.out.println(obj);
	}
	
	
	//单线程可以直接调用该方法
	// 根据图片网络地址下载图片
	public static void download(String url, String path) {
		// path = path.substring(0,path.length()-2);
		File file = null;
		File dirFile = null;
		FileOutputStream fos = null;

		HttpURLConnection httpCon = null;
		URLConnection con = null;
		URL urlObj = null;
		InputStream in = null;
		byte[] size = new byte[1024];
		int num = 0;
		try {
			String downloadName = url.substring(url.lastIndexOf("/") + 1);
			dirFile = new File(path);
			if (!dirFile.exists() && path.length() > 0) {
				if (dirFile.mkdir()) {
					sop("creat document file \"" + path.substring(0, path.length() - 1) + "\" success...\n");
				}
			} else {
				file = new File(path + downloadName);
				fos = new FileOutputStream(file);
				if (url.startsWith("http")) {
					urlObj = new URL(url);
					con = urlObj.openConnection();
					httpCon = (HttpURLConnection) con;
					in = httpCon.getInputStream();
					while ((num = in.read(size)) != -1) {
						for (int i = 0; i < num; i++) {
							fos.write(size[i]);
						}

					}
					// 修改图片大小
					Thumbnails.of(file).size(1280, 1024).rotate(90).toFile(file);
				}
			}
		} catch (FileNotFoundException notFoundE) {
			sop("找不到该网络图片....");
		} catch (NullPointerException nullPointerE) {
			sop("找不到该网络图片....");
		} catch (IOException ioE) {
			sop("产生IO异常.....");
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				fos.close();
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
	}
	
	//解析Excel的文本,
	public static String readExcel(String filename) {
		StringBuffer result = new StringBuffer();
		String fileToBeRead = filename;

		// 创建对Excel工作簿文件的引用
		XSSFWorkbook workbook = null;
		try {
			workbook = new XSSFWorkbook(new FileInputStream(fileToBeRead));

			// 创建对工作表的引用。
			XSSFSheet sheet = workbook.getSheetAt(1);
			// XSSFWorkbook sheet = workbook.getSheet("Sheet1");
			// 便利所有单元格，读取单元格
			int row_num = sheet.getLastRowNum();
			for (int i = 0; i < row_num; i++) {
				XSSFRow r = sheet.getRow(i);
				int cell_num = r.getLastCellNum();
				for (int j = 0; j < cell_num; j++) {
					// System.out.println(r.getCell((short)j).getCellType());
					if (r.getCell((short) j).getCellType() == 1) {
						result.append(r.getCell((short) j).getStringCellValue());
					} else {
						result.append(r.getCell((short) j).getNumericCellValue());
					}
					result.append(" ");
				}
				//result.append("\n");
			}
		} catch (FileNotFoundException e) {
			System.out.println("文件没找到 : " + e);
		} catch (IOException e) {
			System.out.println("已运行IO异常: " + e);
		}
		return result.toString();

	}
	
}